explore

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
library(xml2)
library(ggplot2)
library(shiny)
################################################################################
# extract variables in the IRS990ScheduleD node from the given file
################################################################################
# output is a dataframe with the filename and name of each variable extracted

collect_sched_d_vars <- function(input_file) {
  example_file <- xml2::read_xml(input_file)%>% xml_ns_strip()

  # look at all variable names in schedule D
  IRS990ScheduleD_vars <- example_file %>% 
    xml_find_all("//ReturnData//IRS990ScheduleD")  %>%
    xml_contents() %>% 
    xml_path() 
  
  # look at variable names of children nodes of the schedule D nodes
  IRS990ScheduleD_var_children <- IRS990ScheduleD_vars %>% 
    map(~xml_find_all(example_file,
                      xpath = gsub("/", 
                                   "//" ,
                                   .x, 
                                   fixed = TRUE) ) %>%
          xml_children() %>% 
          xml_path()) %>%
    unlist()
  
    # include both first level schedule D nodes and children nodes
    vars <- c(IRS990ScheduleD_vars, IRS990ScheduleD_var_children)
    
    tibble(variables = vars,
           filename = input_file)
}

# file paths in the directory
files <- dir( "/Users/niezhen/Desktop/Spring 2023/SDS 410/ballet_990_released_20230208",
               full.names = TRUE)

# iterate over all files and extract variables present
# output is a data frame with a file name column and variable name column
all_vars <- map_df(files, collect_sched_d_vars)

all_vars
# after seeing the final table in the data_dictionary counting each variables's occurence, I think it can be nice to add more variables to the endowment data. Liza also mentioned that we can try to trace building endowment change over time, so I think I can try to add BuidingGrp variable into the endowment_data

# Find all variable names that include building information


all_vars <- map_df(files, collect_sched_d_vars)

to_add <- c()

for(row in 1:nrow(all_vars)){
  if(grepl("OtherLandBuildingsGrp", all_vars[row, ]$variables) | grepl("BuildingsGrp", all_vars[row, ]$variables)){
    to_add <- append(to_add, all_vars[row, ]$variables)
  }
}

to_add<-unique(to_add)
to_add
##  [1] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp"                              
##  [2] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/OtherCostOrOtherBasisAmt"     
##  [3] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/DepreciationAmt"              
##  [4] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/BookValueAmt"                 
##  [5] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp"                                       
##  [6] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/OtherCostOrOtherBasisAmt"              
##  [7] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/DepreciationAmt"                       
##  [8] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/BookValueAmt"                          
##  [9] "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/InvestmentCostOrOtherBasisAmt"
## [10] "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/InvestmentCostOrOtherBasisAmt"

Buildings Grouop Explanation from https://www.irs.gov/pub/irs-pdf/i990sd.pdf

Part VI. Land, Buildings, and Equipment Complete Part VI if the organization answered “Yes” on Form 990, Part IV, line 11a, and reported an amount on Form 990, Part X, line 10a. Reporting is required if any amount other than zero is reported on those lines.

Column (a). Enter the cost or other basis of all land, buildings, leasehold improvements, equipment, and other fixed assets held for investment purposes, such as rental properties.

Column (b). Enter the cost or other basis of all other land, buildings, leasehold improvements, equipment, and other fixed assets held for other than investment purposes, including any land, buildings, and equipment owned and used by the organization in conducting its exempt activities. The total amounts reported in columns (a) and (b) must equal the amount reported on Form 990, Part X, line 10a.

Column (c). Enter the accumulated depreciation recorded for the assets listed in columns (a) and (b). Don’t enter an amount in column (c) for line 1a, Land. The total of column (c) must equal the amount reported on Form 990, Part X, line 10b.

Column (d). Enter the sum of column (a) and column (b) minus column (c).

##From Rose's get_endowment function
get_endowment <- function(filename) {
  
  # Retrieving the same endowment information for all 
  variables <- c("//Return//ReturnHeader//ReturnTs", 
                 "//Return//ReturnHeader//Filer//EIN", 
                 "//Return//ReturnData//IRS990//DonorRstrOrQuasiEndowmentsInd",
                 "//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//BeginningYearBalanceAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//ContributionsAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//OtherExpendituresAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYEndwmtFundGrp//EndYearBalanceAmt",
                 
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//BeginningYearBalanceAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//ContributionsAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//OtherExpendituresAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus1YrEndwmtFundGrp//EndYearBalanceAmt",
                
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//BeginningYearBalanceAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//ContributionsAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//OtherExpendituresAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus2YrEndwmtFundGrp//EndYearBalanceAmt",
                 
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//BeginningYearBalanceAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//ContributionsAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//OtherExpendituresAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus3YrEndwmtFundGrp//EndYearBalanceAmt",
                 
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//BeginningYearBalanceAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//ContributionsAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//InvestmentEarningsOrLossesAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//OtherExpendituresAmt",
                 "//Return//ReturnData//IRS990ScheduleD//CYMinus4YrEndwmtFundGrp//EndYearBalanceAmt",
                 
                 "//Return//ReturnData//IRS990ScheduleD//BoardDesignatedBalanceEOYPct",
                 "//Return//ReturnData//IRS990ScheduleD//PrmnntEndowmentBalanceEOYPct",
                 "//Return//ReturnData//IRS990ScheduleD//TermEndowmentBalanceEOYPct",
                 "//Return//ReturnData//IRS990ScheduleD//EndowmentsHeldUnrelatedOrgInd",
                 "//Return//ReturnData//IRS990ScheduleD//EndowmentsHeldRelatedOrgInd",
                 
                 # adding BuildingGrp and OtherLandBuildingsGrp
                 "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/OtherCostOrOtherBasisAmt",     
                 "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/DepreciationAmt",             
                 "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/BookValueAmt",                 
                 "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/OtherCostOrOtherBasisAmt",              
                 "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/DepreciationAmt",                       
                 "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/BookValueAmt",                          
                 "/Return/ReturnData/IRS990ScheduleD/OtherLandBuildingsGrp/InvestmentCostOrOtherBasisAmt",
                  "/Return/ReturnData/IRS990ScheduleD/BuildingsGrp/InvestmentCostOrOtherBasisAmt",         
                 
                 "//AmendedReturnInd",
                 "//Return//ReturnHeader//ReturnTypeCd"
                 )
  
  # Column name; order matters, needs to align with retrieval order
  variables_no_path <- c("ReturnTs", 
                         "EIN",
                         "DonorRstrOrQuasiEndowmentsInd",
                        "CYBeginningYearBalanceAmt", 
                        "CYContributionsAmt", 
                        "CYInvestmentEarningsOrLossesAmt",
                        "CYOtherExpendituresAmt",
                        "CYEndYearBalanceAmt",
                        
                        "CYM1BeginningYearBalanceAmt", 
                        "CYM1ContributionsAmt", 
                        "CYM1InvestmentEarningsOrLossesAmt",
                        "CYM1OtherExpendituresAmt",
                        "CYM1EndYearBalanceAmt",
                        
                        "CYM2BeginningYearBalanceAmt", 
                        "CYM2ContributionsAmt", 
                        "CYM2InvestmentEarningsOrLossesAmt",
                        "CYM2OtherExpendituresAmt",
                        "CYM2EndYearBalanceAmt",
                        
                        "CYM3BeginningYearBalanceAmt", 
                        "CYM3ContributionsAmt", 
                        "CYM3InvestmentEarningsOrLossesAmt",
                        "CYM3OtherExpendituresAmt",
                        "CYM3EndYearBalanceAmt",
                        
                        "CYM4BeginningYearBalanceAmt", 
                        "CYM4ContributionsAmt", 
                        "CYM4InvestmentEarningsOrLossesAmt",
                        "CYM4OtherExpendituresAmt",
                        "CYM4EndYearBalanceAmt",
                        
                        "BoardDesignatedBalanceEOYPct",
                        "PrmnntEndowmentBalanceEOYPct",
                        "TermEndowmentBalanceEOYPct",
                        "EndowmentsHeldUnrelatedOrgInd",
                        "EndowmentsHeldRelatedOrgInd",
                        
                         "OtherLandBuildingsGrp_OtherCostOrOtherBasisAmt",     
                         "OtherLandBuildingsGrp_DepreciationAmt",             
                         "OtherLandBuildingsGrp_BookValueAmt",                 
                         "BuildingsGrp_OtherCostOrOtherBasisAmt",              
                         "BuildingsGrp_DepreciationAmt",                       
                         "BuildingsGrp_BookValueAmt",                          
                         "OtherLandBuildingsGrp_InvestmentCostOrOtherBasisAmt",
                        "BuildingsGrp_InvestmentCostOrOtherBasisAmt",         
                        
                        "AmendedReturnInd",
                        "ReturnTypeCd"
                        )
  
  xml_file <- read_xml(filename)
  xml_file <- xml_ns_strip(xml_file)
  
  # extract each variable; if it isn't present, put NA 
  extracted <- map(variables, ~{
    value <- xml_find_all(
      xml_file, 
      xpath =.x)
    value <- ifelse(length(value) ==0, 
                    NA, 
                    xml_text(value)) })
  
   names(extracted) <- variables_no_path
   
   extracted <- extracted %>%
     as_tibble()
}
##Applying get_endowment to entire output 
files <- dir( "/Users/niezhen/Desktop/Spring 2023/SDS 410/ballet_990_released_20230208",
              full.names = TRUE)

endowment_data_building <- map_df(files, ~
                     get_endowment(.x)) 

all_data<-readRDS("./data/data_990.RDS")
filter_data <- all_data %>%
  filter(ReturnTypeCd == "990" | ReturnTypeCd == "990EZ") %>%
  mutate(amended_return_filled = ifelse(!is.na(AmendedReturnInd),1, 0)) %>%
  group_by(EIN, fiscal_year) %>%
  slice_max(order_by =amended_return_filled, n = 1) %>%
  ungroup()
##Retriving Ts and EINs for filtered data 
filter_ids <- filter_data %>%
  select(ReturnTs, EIN)
## Adjusting data type, filtering to proper 990s
endowment_data_building <- endowment_data_building %>%
  mutate(ReturnDate = as.Date(ReturnTs,
                              format = "%Y-%m-%d")) %>%
  mutate(across(CYBeginningYearBalanceAmt:TermEndowmentBalanceEOYPct,
                as.numeric)) %>%
  mutate(across(c(EndowmentsHeldRelatedOrgInd, EndowmentsHeldUnrelatedOrgInd, DonorRstrOrQuasiEndowmentsInd),
                ~ifelse(.x == "true" | .x == "1", TRUE, FALSE))) %>%
  
  filter(ReturnTypeCd == "990" | ReturnTypeCd == "990EZ") %>%
  right_join(filter_ids, by = c("ReturnTs", "EIN")) %>%
  select(-c(ReturnTypeCd,AmendedReturnInd)) #Removing columns needed for filtering

saveRDS(endowment_data_building, "./data/endowment_filtered_buildings.RDS")

Preliminary Analysis of PartIV Data

Term explained: Book value is an accounting term used for both a measure of a business’s equity and the value of an asset as it appears on a balance sheet. As time goes on, the cost stays the same, but the accumulated depreciation increases, so the book value decreases.

# filter out all data each year
data_each_year<-list(
  data_2015 <- filter(endowment_data_building, grepl("2015", ReturnTs)),
  data_2016 <- filter(endowment_data_building, grepl("2016", ReturnTs)),
  data_2017 <- filter(endowment_data_building, grepl("2017", ReturnTs)),
  data_2018 <- filter(endowment_data_building, grepl("2018", ReturnTs)),
  data_2019 <- filter(endowment_data_building, grepl("2019", ReturnTs)),
  data_2020 <- filter(endowment_data_building, grepl("2020", ReturnTs)),
  data_2021 <- filter(endowment_data_building, grepl("2021", ReturnTs)),
  data_2022 <- filter(endowment_data_building, grepl("2022", ReturnTs))
)
BookValueSum <- list()

# restore each year's sum for each EIN's buildings bookvalue 
for(i in c(1:8)){
  data = data_each_year[[i]]
  df <- data.frame(data %>% 
    filter(!is.na(BuildingsGrp_BookValueAmt)) %>% 
    mutate_at(c("BuildingsGrp_BookValueAmt", "BuildingsGrp_DepreciationAmt"), as.numeric) %>% 
    group_by(EIN) %>% 
    summarise(
      BookValueSum = sum(BuildingsGrp_BookValueAmt)
    ) %>% 
    mutate(year = 2014+i,
           EIN = as.numeric(EIN)))
           
  print(df)
  BookValueSum[[i]] = df
}
##         EIN BookValueSum year
## 1  10372997        91566 2015
## 2 131882106       139511 2015
## 3 581047778      7809407 2015
## 4 581440788      7625797 2015
## 5 621018942      1879384 2015
## 6 640732185       322271 2015
##          EIN BookValueSum year
## 1   10420070      1256986 2016
## 2   42312734      2881261 2016
## 3   50377245       200813 2016
## 4  131882106        74507 2016
## 5  132584273     24421710 2016
## 6  132642091      4899303 2016
## 7  132685755      9749028 2016
## 8  133307859        71215 2016
## 9  161328541      1140339 2016
## 10 210732575      1191342 2016
## 11 231629970      5825676 2016
## 12 237161084       184378 2016
## 13 237247009       241299 2016
## 14 310858562      5859803 2016
## 15 341645238            0 2016
## 16 364009741     17962769 2016
## 17 382026127      8009127 2016
## 18 391134735       283053 2016
## 19 455395602            0 2016
## 20 510394850        40525 2016
## 21 520846173      2996016 2016
## 22 581314711      6537763 2016
## 23 581420599       883662 2016
## 24 581440788      7549349 2016
## 25 630505056       800205 2016
## 26 640732185       300367 2016
## 27 730667485      7436920 2016
## 28 741394920     34192516 2016
## 29 746060386      4684505 2016
## 30 841150857       124075 2016
## 31 846038137      5747870 2016
## 32 910897129            0 2016
## 33 930765746       735820 2016
## 34 931009305      2018914 2016
## 35 941415298     17745677 2016
## 36 942427112      3202747 2016
##          EIN BookValueSum year
## 1   10420070      1208788 2017
## 2   42312734      3077240 2017
## 3   50377245       168954 2017
## 4  131882106         9502 2017
## 5  132584273     23583806 2017
## 6  132642091      4632021 2017
## 7  132685755      9551656 2017
## 8  133307859       -43464 2017
## 9  231629970      5693817 2017
## 10 237161084       163240 2017
## 11 237247009       233154 2017
## 12 260868264            0 2017
## 13 341645238            0 2017
## 14 364009741     17426567 2017
## 15 382026127      3697938 2017
## 16 391134735       276806 2017
## 17 520846173      2852613 2017
## 18 581047778      7578228 2017
## 19 581314711      6258733 2017
## 20 581420599       850744 2017
## 21 581440788      7275576 2017
## 22 581891235       293053 2017
## 23 621018942      1784670 2017
## 24 630505056       792478 2017
## 25 630813626      1267425 2017
## 26 640732185       281411 2017
## 27 730667485     11213840 2017
## 28 741394920     33242880 2017
## 29 746060386      4504252 2017
## 30 841150857        91874 2017
## 31 846038137      5627827 2017
## 32 910897129            0 2017
## 33 930765746       713164 2017
## 34 931009305        88534 2017
## 35 941415298     17232095 2017
## 36 942427112      3079701 2017
##          EIN BookValueSum year
## 1   10420070      1165283 2018
## 2   42312734      2841510 2018
## 3   50377245       163161 2018
## 4  131882106            0 2018
## 5  132584273     22743696 2018
## 6  132642091      4438155 2018
## 7  132685755     16346203 2018
## 8  133307859       -43464 2018
## 9  210732575       590236 2018
## 10 231629970      5560499 2018
## 11 237161084       734364 2018
## 12 237247009       225008 2018
## 13 260868264            0 2018
## 14 310858562      2054607 2018
## 15 341645238        45230 2018
## 16 364009741     16890365 2018
## 17 382026127      3490976 2018
## 18 391134735       257838 2018
## 19 436052680     23269913 2018
## 20 455395602            0 2018
## 21 520846173      2720036 2018
## 22 546049848      2979502 2018
## 23 581047778     14597136 2018
## 24 581314711      5979880 2018
## 25 581420599       817826 2018
## 26 581440788      6879884 2018
## 27 621018942      1690366 2018
## 28 630505056       339933 2018
## 29 630813626      1225063 2018
## 30 640732185       266738 2018
## 31 730667485     23021561 2018
## 32 741394920     32190211 2018
## 33 746060386      4298185 2018
## 34 841150857        74547 2018
## 35 846038137      5475088 2018
## 36 910897129            0 2018
## 37 930765746       690508 2018
## 38 941415298     16367527 2018
## 39 942427112      2950704 2018
##          EIN BookValueSum year
## 1   10420070      1099749 2019
## 2   42312734      6904875 2019
## 3   50377245       150609 2019
## 4  131882106            0 2019
## 5  132584273     44797058 2019
## 6  132642091      4751256 2019
## 7  133307859       -43464 2019
## 8  210732575       551524 2019
## 9  231629970      5791687 2019
## 10 237161084       734364 2019
## 11 237247009       216863 2019
## 12 310858562      3892306 2019
## 13 311784286          327 2019
## 14 341645238        42552 2019
## 15 364009741     16354163 2019
## 16 382026127      3297989 2019
## 17 391134735       390192 2019
## 18 436052680     22742528 2019
## 19 454864757            0 2019
## 20 455395602            0 2019
## 21 520846173      2569933 2019
## 22 546049848      4630690 2019
## 23 581314711      5705452 2019
## 24 581420599       785970 2019
## 25 581440788      6635595 2019
## 26 581891235       131159 2019
## 27 621018942     14865433 2019
## 28 630813626      2320424 2019
## 29 640732185       245008 2019
## 30 741394920     31209056 2019
## 31 746060386      4071036 2019
## 32 841150857        68477 2019
## 33 846038137      5333660 2019
## 34 910897129            0 2019
## 35 930765746       667851 2019
## 36 941415298     15338838 2019
## 37 942427112      2819259 2019
##          EIN BookValueSum year
## 1   10420070      1034653 2020
## 2   42312734      6605314 2020
## 3   50377245       376526 2020
## 4  131882106            0 2020
## 5  132584273     43561723 2020
## 6  132642091      5052565 2020
## 7  132685755      6895294 2020
## 8  133307859       -43464 2020
## 9  210732575       530797 2020
## 10 231629970      6530935 2020
## 11 237003520      3222367 2020
## 12 237161084       734364 2020
## 13 237247009       478976 2020
## 14 237424849            0 2020
## 15 364009741     15817960 2020
## 16 436052680     22215144 2020
## 17 454864757            0 2020
## 18 455395602            0 2020
## 19 520846173      2504039 2020
## 20 541244590            0 2020
## 21 542124670            0 2020
## 22 546049848      8865577 2020
## 23 581047778      7266609 2020
## 24 581314711      5426052 2020
## 25 581420599       753491 2020
## 26 581440788      6391306 2020
## 27 581891235       125012 2020
## 28 616033779       969035 2020
## 29 621018942     28353167 2020
## 30 630813626      1072333 2020
## 31 640732185       223496 2020
## 32 730667485     10772985 2020
## 33 741394920     30201871 2020
## 34 746060386      3896678 2020
## 35 841150857        63367 2020
## 36 846038137      5207587 2020
## 37 910897129            0 2020
## 38 930765746      1790522 2020
## 39 941415298     14305027 2020
## 40 942427112      2704888 2020
##          EIN BookValueSum year
## 1   10420070       971576 2021
## 2   42312734      6066501 2021
## 3   50377245       125508 2021
## 4  131882106            0 2021
## 5  132584273     42044504 2021
## 6  132642091      5101064 2021
## 7  132685755      6653368 2021
## 8  133307859       -43464 2021
## 9  210732575       507717 2021
## 10 231629970      6422464 2021
## 11 237003520      3222367 2021
## 12 237161084       734364 2021
## 13 237247009       225913 2021
## 14 310858562      1837699 2021
## 15 311784286            0 2021
## 16 364009741     15281758 2021
## 17 382026127      3088395 2021
## 18 454864757            0 2021
## 19 461513558            0 2021
## 20 520846173      1298710 2021
## 21 542124670            0 2021
## 22 581047778      7037437 2021
## 23 581314711      5154185 2021
## 24 581891235       118865 2021
## 25 616033779       995302 2021
## 26 730667485     10324836 2021
## 27 741394920     29194686 2021
## 28 746060386      3648651 2021
## 29 841150857        53789 2021
## 30 841622654      1644891 2021
## 31 846038137      5047657 2021
## 32 910897129            0 2021
## 33 941415298     13212979 2021
## 34 942427112      5011759 2021
## 35 943197247      3246223 2021
##         EIN BookValueSum year
## 1 132685755      6411442 2022
## 2 930765746      1102719 2022
#  convert the list into dataframe
bookvalues <- do.call(rbind, BookValueSum)

I notice that bookvalue of 131882106 turned to 0 in 2018

# visualization of overall book value
ggplot(bookvalues, aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
  geom_line()+
  theme_bw()+
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Schedule D Sum of Buildings Book Value",
       x = "Year",
       title = "Schedule D Sum of Buildings Book Value Change",
       subtitle = "By Fiscal Year")

# separate EIN with medium and low building book value according to previous graph
# high
bookvalues %>% 
  filter(BookValueSum >= 1e+07) %>% 
  ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
  geom_line()+
  theme_bw()+
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Schedule D Sum of Buildings Book Value",
       x = "Year",
       title = "Schedule D Sum of Buildings Book Value Change (High)",
       subtitle = "By Fiscal Year")

# medium
bookvalues %>% 
  filter(BookValueSum < 1e+07 & BookValueSum > 2.5e+06) %>% 
  ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
  geom_line()+
  theme_bw() +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Schedule D Sum of Buildings Book Value",
       x = "Year",
       title = "Schedule D Sum of Buildings Book Value Change (Medium)",
       subtitle = "By Fiscal Year")

# low
bookvalues %>% 
  filter(BookValueSum < 2.5e+06) %>% 
  ggplot(aes(x = year, y = BookValueSum, color = as.factor(EIN))) +
  geom_line()+
  theme_bw() +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Schedule D Sum of Buildings Book Value",
       x = "Year",
       title = "Schedule D Sum of Buildings Book Value Change (Low)",
       subtitle = "By Fiscal Year")

The book value will decrease because of depreciation, so the normal trend of an EIN from previous visualization should be a slight downward line. I will filter out EIN with a bizarre trend that should be : 1) Abruptly going down 2) going up at any point 3) appear less than 3 years

(Currently I’m just doing this by eyeballing but I will write code for it later)

Caveat: ways of splitting up split up by quantile add geom_point

#dataframe with higher Book Values
high <- bookvalues %>% 
  filter(BookValueSum > 1e+07)

weird EIN: 132584273, 621018942

medium <-bookvalues %>% 
  filter(BookValueSum < 1e+07 & BookValueSum > 2.5e+06)

weird EIN: 546049868

low <- bookvalues %>% 
  filter(BookValueSum < 1e+07) 

10372997